{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 基础配置"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np, pandas as pd\n",
    "PATH_TRAIN = 'train.txt'  # 训练数据路径\n",
    "log = lambda p: np.log(p + 1e-9)  # 平滑处理的对数函数\n",
    "START = '<start>'  # 起始tag"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据读取、预处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>word</th>\n",
       "      <th>tag</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Newsweek</td>\n",
       "      <td>NNP</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>,</td>\n",
       "      <td>,</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>trying</td>\n",
       "      <td>VBG</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>to</td>\n",
       "      <td>TO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>keep</td>\n",
       "      <td>VB</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       word  tag\n",
       "0  Newsweek  NNP\n",
       "1         ,    ,\n",
       "2    trying  VBG\n",
       "3        to   TO\n",
       "4      keep   VB"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = np.loadtxt(PATH_TRAIN, dtype=str, delimiter='/')\n",
    "pd.DataFrame(train, columns=['word', 'tag']).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(18977, 53)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "words = sorted(set(train[:, 0]))\n",
    "tags = sorted(set(train[:, 1]))\n",
    "\n",
    "W = len(words)  # 词汇量\n",
    "T = len(tags)   # 词性种类数\n",
    "\n",
    "word2id = {words[i]: i for i in range(W)}\n",
    "tag2id = {tags[i]: i for i in range(T)}\n",
    "id2tag = {i: tags[i] for i in range(T)}\n",
    "\n",
    "W, T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# HMM模型训练\n",
    "发射概率矩阵、起始概率矩阵、转移概率矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 矩阵初始化\n",
    "emit_p = np.zeros((T, W))  # emission_probability\n",
    "start_p = np.zeros(T)  # start_probability\n",
    "trans_p = np.zeros((T, T))  # transition_probability\n",
    "\n",
    "# 训练\n",
    "prev_tag = START  # 前一个tag\n",
    "for word, tag in train:\n",
    "    wid, tid = word2id[word], tag2id[tag]\n",
    "    emit_p[tid][wid] += 1\n",
    "    if prev_tag == START:\n",
    "        start_p[tid] += 1\n",
    "    else:\n",
    "        trans_p[tag2id[prev_tag]][tid] += 1\n",
    "    prev_tag = START if word == '.' else tag  # 句尾判断\n",
    "\n",
    "# 频数 --> 概率对数\n",
    "start_p = log(start_p / sum(start_p))\n",
    "for i in range(T):\n",
    "    emit_p[i] = log(emit_p[i] / sum(emit_p[i]))\n",
    "    trans_p[i] = log(trans_p[i] / sum(trans_p[i]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 发射概率矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>developers</th>\n",
       "      <th>developing</th>\n",
       "      <th>development</th>\n",
       "      <th>developments</th>\n",
       "      <th>develops</th>\n",
       "      <th>deviant</th>\n",
       "      <th>deviation</th>\n",
       "      <th>device</th>\n",
       "      <th>devices</th>\n",
       "      <th>devils</th>\n",
       "      <th>devise</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>CC</th>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CD</th>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DT</th>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EX</th>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>FW</th>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IN</th>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JJ</th>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-9.475534</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-9.475534</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    developers  developing  development  developments   develops    deviant  \\\n",
       "CC  -20.723266  -20.723266   -20.723266    -20.723266 -20.723266 -20.723266   \n",
       "CD  -20.723266  -20.723266   -20.723266    -20.723266 -20.723266 -20.723266   \n",
       "DT  -20.723266  -20.723266   -20.723266    -20.723266 -20.723266 -20.723266   \n",
       "EX  -20.723266  -20.723266   -20.723266    -20.723266 -20.723266 -20.723266   \n",
       "FW  -20.723266  -20.723266   -20.723266    -20.723266 -20.723266 -20.723266   \n",
       "IN  -20.723266  -20.723266   -20.723266    -20.723266 -20.723266 -20.723266   \n",
       "JJ  -20.723266   -9.475534   -20.723266    -20.723266 -20.723266  -9.475534   \n",
       "\n",
       "    deviation     device    devices     devils     devise  \n",
       "CC -20.723266 -20.723266 -20.723266 -20.723266 -20.723266  \n",
       "CD -20.723266 -20.723266 -20.723266 -20.723266 -20.723266  \n",
       "DT -20.723266 -20.723266 -20.723266 -20.723266 -20.723266  \n",
       "EX -20.723266 -20.723266 -20.723266 -20.723266 -20.723266  \n",
       "FW -20.723266 -20.723266 -20.723266 -20.723266 -20.723266  \n",
       "IN -20.723266 -20.723266 -20.723266 -20.723266 -20.723266  \n",
       "JJ -20.723266 -20.723266 -20.723266 -20.723266 -20.723266  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(emit_p[7:14, 9988:9999], index=tags[7:14], columns=words[9988:9999])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 起始概率矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>$</th>\n",
       "      <th>''</th>\n",
       "      <th>(</th>\n",
       "      <th>)</th>\n",
       "      <th>,</th>\n",
       "      <th>.</th>\n",
       "      <th>:</th>\n",
       "      <th>CC</th>\n",
       "      <th>CD</th>\n",
       "      <th>DT</th>\n",
       "      <th>EX</th>\n",
       "      <th>FW</th>\n",
       "      <th>IN</th>\n",
       "      <th>JJ</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-2.754958</td>\n",
       "      <td>-5.127924</td>\n",
       "      <td>-5.127924</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-6.360067</td>\n",
       "      <td>-2.958871</td>\n",
       "      <td>-4.736445</td>\n",
       "      <td>-1.526625</td>\n",
       "      <td>-5.908083</td>\n",
       "      <td>-8.999117</td>\n",
       "      <td>-2.192296</td>\n",
       "      <td>-3.305393</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           $        ''         (         )          ,          .         :  \\\n",
       "0 -20.723266 -2.754958 -5.127924 -5.127924 -20.723266 -20.723266 -6.360067   \n",
       "\n",
       "         CC        CD        DT        EX        FW        IN        JJ  \n",
       "0 -2.958871 -4.736445 -1.526625 -5.908083 -8.999117 -2.192296 -3.305393  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(start_p[:14].reshape(1, 14), columns=tags[:14])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 隐状态转移概率矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CC</th>\n",
       "      <th>CD</th>\n",
       "      <th>DT</th>\n",
       "      <th>EX</th>\n",
       "      <th>FW</th>\n",
       "      <th>IN</th>\n",
       "      <th>JJ</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>CC</th>\n",
       "      <td>-7.812780</td>\n",
       "      <td>-3.312973</td>\n",
       "      <td>-2.160294</td>\n",
       "      <td>-5.210093</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-2.996542</td>\n",
       "      <td>-2.214361</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CD</th>\n",
       "      <td>-3.879582</td>\n",
       "      <td>-1.666299</td>\n",
       "      <td>-3.737806</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-8.896854</td>\n",
       "      <td>-2.462315</td>\n",
       "      <td>-3.276461</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DT</th>\n",
       "      <td>-7.454313</td>\n",
       "      <td>-3.811479</td>\n",
       "      <td>-6.461062</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-8.658282</td>\n",
       "      <td>-4.739620</td>\n",
       "      <td>-1.521804</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EX</th>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>FW</th>\n",
       "      <td>-3.970292</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-1.405343</td>\n",
       "      <td>-3.277145</td>\n",
       "      <td>-3.970292</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IN</th>\n",
       "      <td>-6.569239</td>\n",
       "      <td>-2.779579</td>\n",
       "      <td>-1.119830</td>\n",
       "      <td>-6.758481</td>\n",
       "      <td>-8.327093</td>\n",
       "      <td>-3.947574</td>\n",
       "      <td>-2.330645</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JJ</th>\n",
       "      <td>-4.055012</td>\n",
       "      <td>-4.100268</td>\n",
       "      <td>-5.604345</td>\n",
       "      <td>-20.723266</td>\n",
       "      <td>-9.475534</td>\n",
       "      <td>-2.871603</td>\n",
       "      <td>-2.599282</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           CC         CD         DT         EX         FW         IN  \\\n",
       "CC  -7.812780  -3.312973  -2.160294  -5.210093 -20.723266  -2.996542   \n",
       "CD  -3.879582  -1.666299  -3.737806 -20.723266  -8.896854  -2.462315   \n",
       "DT  -7.454313  -3.811479  -6.461062 -20.723266  -8.658282  -4.739620   \n",
       "EX -20.723266 -20.723266 -20.723266 -20.723266 -20.723266 -20.723266   \n",
       "FW  -3.970292 -20.723266 -20.723266 -20.723266  -1.405343  -3.277145   \n",
       "IN  -6.569239  -2.779579  -1.119830  -6.758481  -8.327093  -3.947574   \n",
       "JJ  -4.055012  -4.100268  -5.604345 -20.723266  -9.475534  -2.871603   \n",
       "\n",
       "           JJ  \n",
       "CC  -2.214361  \n",
       "CD  -3.276461  \n",
       "DT  -1.521804  \n",
       "EX -20.723266  \n",
       "FW  -3.970292  \n",
       "IN  -2.330645  \n",
       "JJ  -2.599282  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(trans_p[7:14, 7:14], index=tags[7:14], columns=tags[7:14])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 维特比算法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "sentence = 'Newsweek , trying to keep pace with rival Time magazine '\\\n",
    "           ', announced new advertising rates for 1990 and said it '\\\n",
    "           'will introduce a new incentive plan for advertisers .'.strip().split()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "obs = [word2id[w] for w in sentence]  # 观测序列\n",
    "le = len(obs)  # 句子长度\n",
    "\n",
    "dp = np.array([[-1e99] * T] * le)  # 记录节点最大概率对数\n",
    "path = np.zeros((le, T), dtype=int)  # 记录上个转移节点\n",
    "\n",
    "for j in range(T):\n",
    "    dp[0][j] = start_p[j] + emit_p[j][obs[0]]\n",
    "\n",
    "for i in range(1, le):\n",
    "    for j in range(T):\n",
    "        dp[i][j], path[i][j] = max(\n",
    "            (dp[i - 1][k] + trans_p[k][j] + emit_p[j][obs[i]], k)\n",
    "            for k in range(T))\n",
    "\n",
    "# 隐序列\n",
    "states = [np.argmax(dp[le - 1])]\n",
    "# 从后到前的循环来依次求出每个单词的词性\n",
    "for i in range(le - 2, -1, -1):\n",
    "    states.append(path[i + 1][states[0]])\n",
    "states = [id2tag[i] for i in states[::-1]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 动态规划矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Newsweek</th>\n",
       "      <th>,</th>\n",
       "      <th>trying</th>\n",
       "      <th>to</th>\n",
       "      <th>keep</th>\n",
       "      <th>pace</th>\n",
       "      <th>with</th>\n",
       "      <th>rival</th>\n",
       "      <th>Time</th>\n",
       "      <th>magazine</th>\n",
       "      <th>...</th>\n",
       "      <th>it</th>\n",
       "      <th>will</th>\n",
       "      <th>introduce</th>\n",
       "      <th>a</th>\n",
       "      <th>new</th>\n",
       "      <th>incentive</th>\n",
       "      <th>plan</th>\n",
       "      <th>for</th>\n",
       "      <th>advertisers</th>\n",
       "      <th>.</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>CC</th>\n",
       "      <td>-23.682136</td>\n",
       "      <td>-33.189414</td>\n",
       "      <td>-34.425319</td>\n",
       "      <td>-43.722532</td>\n",
       "      <td>-49.053794</td>\n",
       "      <td>-52.198943</td>\n",
       "      <td>-61.138477</td>\n",
       "      <td>-69.012975</td>\n",
       "      <td>-76.521668</td>\n",
       "      <td>-86.013875</td>\n",
       "      <td>...</td>\n",
       "      <td>-150.422061</td>\n",
       "      <td>-153.126019</td>\n",
       "      <td>-159.302255</td>\n",
       "      <td>-162.802500</td>\n",
       "      <td>-168.498196</td>\n",
       "      <td>-170.312420</td>\n",
       "      <td>-178.714511</td>\n",
       "      <td>-186.586369</td>\n",
       "      <td>-193.832408</td>\n",
       "      <td>-199.626142</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CD</th>\n",
       "      <td>-25.459711</td>\n",
       "      <td>-33.913098</td>\n",
       "      <td>-35.729313</td>\n",
       "      <td>-43.308098</td>\n",
       "      <td>-44.150993</td>\n",
       "      <td>-51.225494</td>\n",
       "      <td>-63.049824</td>\n",
       "      <td>-65.223315</td>\n",
       "      <td>-76.963899</td>\n",
       "      <td>-87.140310</td>\n",
       "      <td>...</td>\n",
       "      <td>-147.232958</td>\n",
       "      <td>-156.622523</td>\n",
       "      <td>-168.066622</td>\n",
       "      <td>-161.829051</td>\n",
       "      <td>-164.855362</td>\n",
       "      <td>-170.357677</td>\n",
       "      <td>-180.625858</td>\n",
       "      <td>-188.497717</td>\n",
       "      <td>-190.042748</td>\n",
       "      <td>-203.080089</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DT</th>\n",
       "      <td>-22.249890</td>\n",
       "      <td>-35.901230</td>\n",
       "      <td>-33.964612</td>\n",
       "      <td>-40.943210</td>\n",
       "      <td>-43.846132</td>\n",
       "      <td>-48.983210</td>\n",
       "      <td>-62.951194</td>\n",
       "      <td>-63.563566</td>\n",
       "      <td>-78.334385</td>\n",
       "      <td>-87.826592</td>\n",
       "      <td>...</td>\n",
       "      <td>-146.129777</td>\n",
       "      <td>-153.067178</td>\n",
       "      <td>-157.222815</td>\n",
       "      <td>-140.320617</td>\n",
       "      <td>-167.504945</td>\n",
       "      <td>-171.861754</td>\n",
       "      <td>-180.527227</td>\n",
       "      <td>-188.399086</td>\n",
       "      <td>-188.382999</td>\n",
       "      <td>-201.104852</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EX</th>\n",
       "      <td>-26.631348</td>\n",
       "      <td>-48.000536</td>\n",
       "      <td>-37.508169</td>\n",
       "      <td>-60.000289</td>\n",
       "      <td>-57.276186</td>\n",
       "      <td>-56.130763</td>\n",
       "      <td>-67.051070</td>\n",
       "      <td>-69.202217</td>\n",
       "      <td>-82.434260</td>\n",
       "      <td>-91.926467</td>\n",
       "      <td>...</td>\n",
       "      <td>-151.743816</td>\n",
       "      <td>-169.123776</td>\n",
       "      <td>-172.067867</td>\n",
       "      <td>-166.734320</td>\n",
       "      <td>-176.947744</td>\n",
       "      <td>-186.980675</td>\n",
       "      <td>-184.627103</td>\n",
       "      <td>-192.498962</td>\n",
       "      <td>-194.021649</td>\n",
       "      <td>-206.258131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>FW</th>\n",
       "      <td>-29.722383</td>\n",
       "      <td>-37.568935</td>\n",
       "      <td>-39.562291</td>\n",
       "      <td>-60.000289</td>\n",
       "      <td>-58.844798</td>\n",
       "      <td>-56.130763</td>\n",
       "      <td>-67.051070</td>\n",
       "      <td>-70.770829</td>\n",
       "      <td>-82.339164</td>\n",
       "      <td>-90.796147</td>\n",
       "      <td>...</td>\n",
       "      <td>-165.093395</td>\n",
       "      <td>-169.123776</td>\n",
       "      <td>-172.067867</td>\n",
       "      <td>-149.981346</td>\n",
       "      <td>-169.702165</td>\n",
       "      <td>-175.732943</td>\n",
       "      <td>-184.627103</td>\n",
       "      <td>-192.498962</td>\n",
       "      <td>-195.590262</td>\n",
       "      <td>-217.471142</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IN</th>\n",
       "      <td>-22.915562</td>\n",
       "      <td>-33.152509</td>\n",
       "      <td>-34.503503</td>\n",
       "      <td>-29.794439</td>\n",
       "      <td>-46.933531</td>\n",
       "      <td>-49.649192</td>\n",
       "      <td>-41.720470</td>\n",
       "      <td>-66.391310</td>\n",
       "      <td>-74.686097</td>\n",
       "      <td>-84.178304</td>\n",
       "      <td>...</td>\n",
       "      <td>-146.504719</td>\n",
       "      <td>-151.747329</td>\n",
       "      <td>-159.302255</td>\n",
       "      <td>-149.465997</td>\n",
       "      <td>-165.783503</td>\n",
       "      <td>-169.129012</td>\n",
       "      <td>-176.878940</td>\n",
       "      <td>-166.539903</td>\n",
       "      <td>-191.210743</td>\n",
       "      <td>-198.248116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JJ</th>\n",
       "      <td>-24.028659</td>\n",
       "      <td>-34.741624</td>\n",
       "      <td>-35.073657</td>\n",
       "      <td>-30.660768</td>\n",
       "      <td>-45.083503</td>\n",
       "      <td>-49.932290</td>\n",
       "      <td>-62.690105</td>\n",
       "      <td>-52.140365</td>\n",
       "      <td>-75.462913</td>\n",
       "      <td>-87.565503</td>\n",
       "      <td>...</td>\n",
       "      <td>-147.292381</td>\n",
       "      <td>-153.487032</td>\n",
       "      <td>-159.302255</td>\n",
       "      <td>-148.594974</td>\n",
       "      <td>-145.534143</td>\n",
       "      <td>-168.856691</td>\n",
       "      <td>-180.266139</td>\n",
       "      <td>-188.137998</td>\n",
       "      <td>-189.593814</td>\n",
       "      <td>-200.920605</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>7 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Newsweek          ,     trying         to       keep       pace  \\\n",
       "CC -23.682136 -33.189414 -34.425319 -43.722532 -49.053794 -52.198943   \n",
       "CD -25.459711 -33.913098 -35.729313 -43.308098 -44.150993 -51.225494   \n",
       "DT -22.249890 -35.901230 -33.964612 -40.943210 -43.846132 -48.983210   \n",
       "EX -26.631348 -48.000536 -37.508169 -60.000289 -57.276186 -56.130763   \n",
       "FW -29.722383 -37.568935 -39.562291 -60.000289 -58.844798 -56.130763   \n",
       "IN -22.915562 -33.152509 -34.503503 -29.794439 -46.933531 -49.649192   \n",
       "JJ -24.028659 -34.741624 -35.073657 -30.660768 -45.083503 -49.932290   \n",
       "\n",
       "         with      rival       Time   magazine     ...              it  \\\n",
       "CC -61.138477 -69.012975 -76.521668 -86.013875     ...     -150.422061   \n",
       "CD -63.049824 -65.223315 -76.963899 -87.140310     ...     -147.232958   \n",
       "DT -62.951194 -63.563566 -78.334385 -87.826592     ...     -146.129777   \n",
       "EX -67.051070 -69.202217 -82.434260 -91.926467     ...     -151.743816   \n",
       "FW -67.051070 -70.770829 -82.339164 -90.796147     ...     -165.093395   \n",
       "IN -41.720470 -66.391310 -74.686097 -84.178304     ...     -146.504719   \n",
       "JJ -62.690105 -52.140365 -75.462913 -87.565503     ...     -147.292381   \n",
       "\n",
       "          will   introduce           a         new   incentive        plan  \\\n",
       "CC -153.126019 -159.302255 -162.802500 -168.498196 -170.312420 -178.714511   \n",
       "CD -156.622523 -168.066622 -161.829051 -164.855362 -170.357677 -180.625858   \n",
       "DT -153.067178 -157.222815 -140.320617 -167.504945 -171.861754 -180.527227   \n",
       "EX -169.123776 -172.067867 -166.734320 -176.947744 -186.980675 -184.627103   \n",
       "FW -169.123776 -172.067867 -149.981346 -169.702165 -175.732943 -184.627103   \n",
       "IN -151.747329 -159.302255 -149.465997 -165.783503 -169.129012 -176.878940   \n",
       "JJ -153.487032 -159.302255 -148.594974 -145.534143 -168.856691 -180.266139   \n",
       "\n",
       "           for  advertisers           .  \n",
       "CC -186.586369  -193.832408 -199.626142  \n",
       "CD -188.497717  -190.042748 -203.080089  \n",
       "DT -188.399086  -188.382999 -201.104852  \n",
       "EX -192.498962  -194.021649 -206.258131  \n",
       "FW -192.498962  -195.590262 -217.471142  \n",
       "IN -166.539903  -191.210743 -198.248116  \n",
       "JJ -188.137998  -189.593814 -200.920605  \n",
       "\n",
       "[7 rows x 29 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(dp.T[7:14], index=tags[7:14], columns=sentence)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 记录节点转移矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Newsweek</th>\n",
       "      <th>,</th>\n",
       "      <th>trying</th>\n",
       "      <th>to</th>\n",
       "      <th>keep</th>\n",
       "      <th>pace</th>\n",
       "      <th>with</th>\n",
       "      <th>rival</th>\n",
       "      <th>Time</th>\n",
       "      <th>magazine</th>\n",
       "      <th>...</th>\n",
       "      <th>it</th>\n",
       "      <th>will</th>\n",
       "      <th>introduce</th>\n",
       "      <th>a</th>\n",
       "      <th>new</th>\n",
       "      <th>incentive</th>\n",
       "      <th>plan</th>\n",
       "      <th>for</th>\n",
       "      <th>advertisers</th>\n",
       "      <th>.</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>$</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>21</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>''</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>(</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>21</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>)</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>13</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>21</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>30</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>,</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>.</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>:</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>13</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CC</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CD</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>21</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DT</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EX</th>\n",
       "      <td>0</td>\n",
       "      <td>52</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>FW</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>21</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IN</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JJ</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JJR</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JJS</th>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JJ|NN</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>JJ|RB</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LS</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MD</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>13</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NN</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NNP</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>21</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NNPS</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>21</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>21</td>\n",
       "      <td>39</td>\n",
       "      <td>12</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NNS</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NNS|NN</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>45</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NN|NNS</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PDT</th>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>41</td>\n",
       "      <td>12</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>45</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>POS</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>21</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>21</td>\n",
       "      <td>...</td>\n",
       "      <td>21</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PRP</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PRP$</th>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RB</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RBR</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RBR|JJR</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RBS</th>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>9</td>\n",
       "      <td>27</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>45</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RB|IN</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RP</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>13</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>13</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>30</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SYM</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>21</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TO</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>13</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>UH</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>5</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>5</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>VB</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>VBD</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>VBG</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>VBG|NN</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>VBN</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>VBN|JJ</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>VBP</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>13</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>VBZ</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>13</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>VB|NN</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WDT</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WP</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WP$</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>12</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WRB</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>20</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>``</th>\n",
       "      <td>0</td>\n",
       "      <td>21</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>37</td>\n",
       "      <td>39</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>40</td>\n",
       "      <td>28</td>\n",
       "      <td>19</td>\n",
       "      <td>39</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>12</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>53 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         Newsweek   ,  trying  to  keep  pace  with  rival  Time  magazine  \\\n",
       "$               0  21       4  41    37    39    20     12    13        21   \n",
       "''              0  21       4  41    37    39    20     12    13        20   \n",
       "(               0  21       4  41    37    39    20     12    20        21   \n",
       ")               0  21       4  41    13    39    20     12    20        21   \n",
       ",               0  21       4  41    37    39    20     12    20        20   \n",
       ".               0  21       4  41    37    39    20     12    20        20   \n",
       ":               0  21       4  41    13    39    20     12    20        20   \n",
       "CC              0  21       4  41    37    39    20     12    20        20   \n",
       "CD              0  21       4  41    37    39    20     12    13        21   \n",
       "DT              0  21       4  41    37    39    20     12    20        20   \n",
       "EX              0  52       4  41    12    39    20     12    20        20   \n",
       "FW              0  21       4  41    12    39    20     12    13        21   \n",
       "IN              0  21       4  41    37    39    20     12    20        20   \n",
       "JJ              0  21       4  41    37    39    20     12    13        20   \n",
       "JJR             0  21       4  41    37    39    20     12    20        20   \n",
       "JJS             0   9       4   9    12    39    20     12    13        20   \n",
       "JJ|NN           0  21       4  41    37    39    20     12    20        20   \n",
       "JJ|RB           0  21       4  41    12    39    20     12    13        20   \n",
       "LS              0  21       4  41    37    39    20     12    20        20   \n",
       "MD              0  21       4  41    13    39    20     12    20        20   \n",
       "NN              0  21       4  41    37    39    20     12    13        20   \n",
       "NNP             0  21       4  41    37    39    20     12    13        21   \n",
       "NNPS            0  21       4  41    37    39     9     12    13        21   \n",
       "NNS             0  21       4  41    37    39    20     12    13        20   \n",
       "NNS|NN          0  21       4  41    12    39    20     12    13        20   \n",
       "NN|NNS          0  21       4  41    37    39    20     12    13        20   \n",
       "PDT             0  12       4  41    12    39    12     12    41        12   \n",
       "POS             0  21       4  21    12    39    20     12    20        21   \n",
       "PRP             0  21       4  41    37    39    20     12    20        20   \n",
       "PRP$            0  12       4  41    37    39    20     12    13        20   \n",
       "RB              0  21       4  41    37    39    20     12    20        20   \n",
       "RBR             0  21       4  41    37    39    20     12    20        20   \n",
       "RBR|JJR         0  21       4  41    12    39    20     12    13        20   \n",
       "RBS             0   9       4  41    37    39     9     12     9        27   \n",
       "RB|IN           0  21       4  41    37    39    20     12    13        20   \n",
       "RP              0  21       4  41    13    39    20     12    20        20   \n",
       "SYM             0  21       4  41    37    39    20     12    13        21   \n",
       "TO              0  21       4  41    13    39    20     12    13        20   \n",
       "UH              0  21       4  41    37    39     5     12    13         5   \n",
       "VB              0  21       4  41    37    39    20     12    20        20   \n",
       "VBD             0  21       4  41    12    39    20     12    20        20   \n",
       "VBG             0  21       4  41    37    39    20     12    20        20   \n",
       "VBG|NN          0  21       4  41    12    39    20     12    13        20   \n",
       "VBN             0  21       4  41    37    39    20     12    20        20   \n",
       "VBN|JJ          0  21       4  41    12    39    20     12    13        20   \n",
       "VBP             0  21       4  41    13    39    20     12    20        20   \n",
       "VBZ             0  21       4  41    13    39    20     12    20        20   \n",
       "VB|NN           0  21       4  41    37    39    20     12    13        20   \n",
       "WDT             0  21       4  41    37    39    20     12    20        20   \n",
       "WP              0  21       4  41    37    39    20     12    20        20   \n",
       "WP$             0  21       4  41    12    39    20     12    20        20   \n",
       "WRB             0  21       4  41    37    39    20     12    20        20   \n",
       "``              0  21       4  41    37    39    20     12    13        20   \n",
       "\n",
       "        ...  it  will  introduce   a  new  incentive  plan  for  advertisers  \\\n",
       "$       ...  40    28         20  39    9         13    20   20           12   \n",
       "''      ...  40    28         19  39    9         13    20   20           12   \n",
       "(       ...  40    28         19  39    9         13    20   20           12   \n",
       ")       ...  40    28         19  39    9         13    20   20           30   \n",
       ",       ...  40    28         19  39    9         13    20   20           12   \n",
       ".       ...  40    28         19  39    9         13    20   20           12   \n",
       ":       ...  40    28         20  39    9         13    20   20           12   \n",
       "CC      ...  40    28         19  39    9         13    20   20           12   \n",
       "CD      ...  40    28         20  39    9         13    20   20           12   \n",
       "DT      ...  40    28         19  39    9         13    20   20           12   \n",
       "EX      ...  40    28         20  39   12         13    20   20           12   \n",
       "FW      ...  40    28         20  39    9         13    20   20           12   \n",
       "IN      ...  40    28         19  39    9         13    20   20           12   \n",
       "JJ      ...  40    28         19  39    9         13    20   20           12   \n",
       "JJR     ...  40    28         20  39    9         13    20   20           12   \n",
       "JJS     ...  40    28         19  39    9         13    20   20           12   \n",
       "JJ|NN   ...  40    28         19  39    9         13    20   20           12   \n",
       "JJ|RB   ...  40    28         19  39   12         13    20   20           12   \n",
       "LS      ...  40    28         19  39    9         13    20   20           12   \n",
       "MD      ...  40    28         20  39    9         13    20   20           12   \n",
       "NN      ...  40    28         20  39    9         13    20   20           12   \n",
       "NNP     ...  40    28         19  39    9         13    20   20           12   \n",
       "NNPS    ...  40    28         19  39    9         13    21   39           12   \n",
       "NNS     ...  40    28         19  39    9         13    20   20           12   \n",
       "NNS|NN  ...  40    28         19  39   13         13    20   45           12   \n",
       "NN|NNS  ...  40    28         19  39    9         13    20   39           12   \n",
       "PDT     ...  40    28         19  39   12         13    20   45           12   \n",
       "POS     ...  21    28         20  39   13         13    20   20           12   \n",
       "PRP     ...  40    28         19  39    9         13    20   20           12   \n",
       "PRP$    ...  40    28         19  39    9         13    20   20           12   \n",
       "RB      ...  40    28         19  39    9         13    20   20           12   \n",
       "RBR     ...  40    28         19  39    9         13    20   20           12   \n",
       "RBR|JJR ...  40    28         19  39   12         13    20   20           12   \n",
       "RBS     ...  40    28         19  39    9         13    20   45           12   \n",
       "RB|IN   ...  40    28         19  39    9         13    20   20           12   \n",
       "RP      ...  40    28         20  39   13         13    20   20           30   \n",
       "SYM     ...  40    28         19  39    9         13    20   20           12   \n",
       "TO      ...  40    28         19  39    9         13    20   20           12   \n",
       "UH      ...  40    28         19  39    9         13     5    5           12   \n",
       "VB      ...  40    28         19  39    9         13    20   20           12   \n",
       "VBD     ...  40    28         19  39    9         13    20   20           12   \n",
       "VBG     ...  40    28         19  39    9         13    20   20           12   \n",
       "VBG|NN  ...  40    28         19  39   12         13    20   20           12   \n",
       "VBN     ...  40    28         19  39    9         13    20   20           12   \n",
       "VBN|JJ  ...  40    28         19  39   12         13    20   20           12   \n",
       "VBP     ...  40    28         20  39    9         13    20   20           12   \n",
       "VBZ     ...  40    28         20  39    9         13    20   20           12   \n",
       "VB|NN   ...  40    28         19  39    9         13    20   20           12   \n",
       "WDT     ...  40    28         20  39    9         13    20   20           12   \n",
       "WP      ...  40    28         20  39    9         13    20   20           12   \n",
       "WP$     ...  40    28         20  39   12         13    20   20           12   \n",
       "WRB     ...  40    28         20  39    9         13    20   20           12   \n",
       "``      ...  40    28         19  39    9         13    20   20           12   \n",
       "\n",
       "          .  \n",
       "$        23  \n",
       "''       23  \n",
       "(        23  \n",
       ")        23  \n",
       ",        23  \n",
       ".        23  \n",
       ":        23  \n",
       "CC       23  \n",
       "CD       23  \n",
       "DT       23  \n",
       "EX       23  \n",
       "FW       21  \n",
       "IN       23  \n",
       "JJ       23  \n",
       "JJR      23  \n",
       "JJS      23  \n",
       "JJ|NN    23  \n",
       "JJ|RB    23  \n",
       "LS       23  \n",
       "MD       23  \n",
       "NN       23  \n",
       "NNP      23  \n",
       "NNPS     21  \n",
       "NNS      23  \n",
       "NNS|NN   23  \n",
       "NN|NNS   23  \n",
       "PDT      23  \n",
       "POS      23  \n",
       "PRP      23  \n",
       "PRP$     23  \n",
       "RB       23  \n",
       "RBR      23  \n",
       "RBR|JJR  23  \n",
       "RBS      23  \n",
       "RB|IN    23  \n",
       "RP       23  \n",
       "SYM      23  \n",
       "TO       23  \n",
       "UH       23  \n",
       "VB       23  \n",
       "VBD      23  \n",
       "VBG      23  \n",
       "VBG|NN   23  \n",
       "VBN      23  \n",
       "VBN|JJ   23  \n",
       "VBP      23  \n",
       "VBZ      23  \n",
       "VB|NN    23  \n",
       "WDT      23  \n",
       "WP       23  \n",
       "WP$      23  \n",
       "WRB      23  \n",
       "``       23  \n",
       "\n",
       "[53 rows x 29 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(path.T, index=tags, columns=sentence)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Newsweek NNP\n",
      ", ,\n",
      "trying VBG\n",
      "to TO\n",
      "keep VB\n",
      "pace NN\n",
      "with IN\n",
      "rival NN\n",
      "Time NN\n",
      "magazine NN\n",
      ", ,\n",
      "announced VBD\n",
      "new JJ\n",
      "advertising NN\n",
      "rates NNS\n",
      "for IN\n",
      "1990 CD\n",
      "and NN\n",
      "said VBD\n",
      "it PRP\n",
      "will MD\n",
      "introduce VB\n",
      "a DT\n",
      "new JJ\n",
      "incentive NN\n",
      "plan NN\n",
      "for IN\n",
      "advertisers NNS\n",
      ". .\n"
     ]
    }
   ],
   "source": [
    "for word, tag in zip(sentence, states):\n",
    "    print(word, tag)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
